library(readxl)
library(ggplot2)
library(ggforce)
library(tidyverse)

fpth <- file.path("new plots", "JH_firmtype_625_1.0.xlsx")
df <- read_excel(fpth, sheet = "long")

df$m <- 0
df$m[df$firmtype == "PD"] <- df$prob_inlanddomestic[df$firmtype == "PD"]
df$m[df$firmtype == "CI"] <- df$prob_coastexporter[df$firmtype == "CI"]
df$m[df$firmtype == "CD"] <- df$prob_coastexporter[df$firmtype == "CD"]
df$m[df$firmtype == "PI"] <- df$prob_coastexporter[df$firmtype == "PI"]
df$type = "Exit"
df$type[df$firmtype == "PD"] <- "Periphery-Domestic"
df$type[df$firmtype == "PI"] <- "Periphery-International"
df$type[df$firmtype == "CI"] <- "Core-International"
df$type[df$firmtype == "CD"] <- "Core-Domestic"
phi_del <- read_excel(fpth) 
phi_del$qt <- seq_len(nrow(phi_del))
df$phi <- round(df$phi, 4)
df$del <- round(df$del, 4)
phi_del$phi <- round(phi_del$phi, 4)
phi_del$del <- round(phi_del$del, 4)
df <- merge(df, phi_del[, c("phi", "qt")], by = "phi")
names(df)[names(df) == "qt"] <- "phi_qt"
df <- merge(df, phi_del[, c("del", "qt")], by = "del")
names(df)[names(df) == "qt"] <- "del_qt"


# df$alp <- 0.1
# df$alp[df$type == "Core-International"] <- 1
# df$alp[df$type == "Core-Domestic"] <- 0.95
# df$alp[df$type == "Periphery-Domestic"] <- 0.95


# draw line between cd and ci 
phi_pts <- c()
for (del in unique(df$del_qt)) {
    v1 <- min(df[df$del_qt == del & df$firmtype == "CI", "phi_qt"])
    v2 <- max(df[df$del_qt == del & df$firmtype == "CD", "phi_qt"])
    phi_pts <- c(phi_pts, mean(v1, v2))
}
# phi_pts[phi_pts > 1] <- phi_pts[phi_pts > 1] - 0.5
del_pts_grid <- unique(df$del_qt)

# draw line between cd and ci 
del_pts <- c()
for (phi in unique(df$phi_qt)) {
    v1 <- min(df[df$phi_qt == phi & df$firmtype == "CI", "del_qt"])
    v2 <- max(df[df$phi_qt == phi & df$firmtype == "CD", "del_qt"])
    del_pts <- c(del_pts, mean(v1, v2))
}
# del_pts[del_pts > 1] <- del_pts[phi_pts > 1] - 0.5
phi_pts_grid <- unique(df$phi_qt)
lines_1 <- data.frame(del_qt = del_pts, phi_qt = phi_pts_grid)
lines_2 <- data.frame(del_qt = del_pts_grid, phi_qt = phi_pts)
lines_cd <- unique(union(lines_1, lines_2))
lines_cd <- lines_cd[order(lines_cd$del_qt, -lines_cd$phi_qt), ]
lines_cd$del_qt <- lines_cd$del_qt - 0.5
lines_cd$phi_qt <- lines_cd$phi_qt - 0.5

# draw line between pd and cd
phi_pts <- c()
for (del in unique(df$del_qt)) {
    v1 <- min(df[df$del_qt == del & df$firmtype == "CD", "phi_qt"])
    v2 <- max(df[df$del_qt == del & df$firmtype == "PD", "phi_qt"])
    phi_pts <- c(phi_pts, mean(v1, v2))
}
# phi_pts[phi_pts > 1] <- phi_pts[phi_pts > 1] - 0.5
del_pts_grid <- unique(df$del_qt)

# draw line between pd and cd 
del_pts <- c()
for (phi in unique(df$phi_qt)) {
    v1 <- min(df[df$phi_qt == phi & df$firmtype == "CD", "del_qt"])
    v2 <- max(df[df$phi_qt == phi & df$firmtype == "PD", "del_qt"])
    del_pts <- c(del_pts, mean(v1, v2))
}
# del_pts[del_pts > 1] <- del_pts[phi_pts > 1] - 0.5
phi_pts_grid <- unique(df$phi_qt)
lines_1 <- data.frame(del_qt = del_pts, phi_qt = phi_pts_grid)
lines_2 <- data.frame(del_qt = del_pts_grid, phi_qt = phi_pts)
lines_ci <- unique(union(lines_1, lines_2))
lines_ci <- lines_ci[order(lines_ci$del_qt, -lines_ci$phi_qt), ]
lines_ci$del_qt <- lines_ci$del_qt - 0.5
lines_ci$phi_qt <- lines_ci$phi_qt - 0.5

max_phi <- max(df[df$firmtype == "PD", "phi_qt"])+1
max_del <- max(df[df$firmtype == "PD", "del_qt"])+1

lines_ci <- lines_ci[lines_ci$del_qt <= max_del & lines_ci$phi_qt <= max_phi, ]


var <- "dirtyinput"
out_path <- file.path("new plots")
scale_range <- c(1.5, 20)

###################################
##### BELOW USED TO BE IN FUNCTION
###################################
# MIGHT NOT NEED THIS
df$m[df$type == "Exit"] <- 0

# CREATE FILL VARIABLES (TOP AND BOTTOM 10%)
df$fill_var <- 0
vals <- quantile(df[df$type != "Exit", var], probs = seq(0, 1, 0.1))[c(2, 10)]
# pvals <- quantile(df[df$firmtype %in% c("PD", "PI"), var], probs = seq(0, 1, 0.1))[c(2, 9)]
pval <- order(df[df$firmtype %in% c("PD", "PI"), var], decreasing = TRUE)[30]
pval <- df[df$firmtype %in% c("PD", "PI"), var][pval]
# cvals <- quantile(df[df$firmtype %in% c("CD", "CI"), var], probs = seq(0, 1, 0.1))[c(2, 10)]
cval <- order(df[df$firmtype %in% c("CD", "CI"), var], decreasing = TRUE)[30]
cval <- df[df$firmtype %in% c("CD", "CI"), var][cval]
df$fill_var[df$type != "Exit" & df[, var] > vals[2]] <- 1 
df$fill_var[df$type != "Exit" & df[, var] < vals[1]] <- 1


# SHADE IN EXITING FIRMS AREA
m <- df[df$type == "Exit", c('phi_qt', 'del_qt')]
m$phi_qt <- m$phi_qt * 4
m$del_qt <- m$del_qt * 4
hull <- chull(m) #create coordinates of convex hull
coords <- m[ c( hull, hull[1]), ]
# LABEL THE EXITING FIRMS
exit_x <- mean(df[df$type == "Exit", "del_qt"]) * 4
exit_y <- mean(df[df$type == "Exit", "phi_qt"]) * 4


# SHADE IN THE TOP 20% OF PERIPHERY FIRMS 
m_p <- df[df$firmtype %in% c("PD", "PI") & df[, var] >= pval, c('phi_qt', 'del_qt')]
m_p$phi_qt <- m_p$phi_qt * 4
m_p$del_qt <- m_p$del_qt * 4
hull <- chull(m_p) #create coordinates of convex hull
coords_p <- m_p[c( hull, hull[1]), ]
m_p$id <- "30 Largest\nPeriphery Firms"
m_p$x0 <- 30
m_p$y0 <- 80

# LABEL TOP 10% FIRMS AREA
lb_py <- mean(m_p$phi_qt)
lb_px <- mean(m_p$del_qt)

# SHADE IN THE TOP 10% OF CORE FIRMS 
m_c <- df[df$firmtype %in% c("CD", "CI") & df[, var] >= cval, c('phi_qt', 'del_qt')]
m_c$phi_qt <- m_c$phi_qt * 4
m_c$del_qt <- m_c$del_qt * 4
hull <- chull(m_c) #create coordinates of convex hull
coords_c <- m_c[c(hull, hull[1]), ]
m_c$id <- "30 Largest\nCore Firms"
m_c$x0 <- 85
m_c$y0 <- 60

# LABEL TOP 10% FIRMS AREA
lb_cy <- mean(m_c$phi_qt)
lb_cx <- mean(m_c$del_qt)

m <- rbind(m_p, m_c)

df <- merge(df, m, by = c("phi_qt", "del_qt"), all.x = T)

firm_plot <- function(var, scale_range, out_path, trans, q_lab) {
    # CREATE LEGEND ORDER
    ordered_leg <- c("Exit", "Periphery-Domestic",
        "Periphery-International", "Core-Domestic", "Core-International")

    color_leg <- c("Exit" = 'grey', "Periphery-Domestic" = "#d9d9fe",
                    "Periphery-International" = "#0F2080", "Core-Domestic" = "#7ac77f", #"#A95AA1" ,
                    "Core-International" = "#cc3137")
    shape_leg <- c("Exit" = 21, "Periphery-Domestic" = 23, "Periphery-International" = 8,
                    "Core-Domestic" = 22, "Core-International" = 24)

    q_lab <- paste0(q_lab, ":")
    # get quantiles of size of dots
    qs <- quantile(df[df$type != "Exit", var])

    # MAKE THE PLOT
    ggplot(df[df$type != "Exit", ], # DO NOT PLOT EXITING FIRMS
            aes(del_qt * 4, phi_qt * 4, fill = factor(type),
                size = .data[[var]], color = type)) + #shape = type, 
        geom_point(stroke = 1.25) +
        # geom_mark_hull(data = df[!(df$type %in% c("Exit", "Core-International")), ],
        #     aes(x = 4*del_qt, y = 4*phi_qt, color = type, fill = NULL),
        #     concavity = 2, expand = unit(2.5, "mm")) + 
        geom_path(data = lines_cd, aes(del_qt*4, phi_qt*4), linetype = "dashed",
                inherit.aes = F, size = 1.2, color = "#b00f14") +
        geom_path(data = lines_ci, aes(del_qt*4, phi_qt*4), linetype = "dashed",
                inherit.aes = F, size = 1.2, color = "forestgreen") +
        geom_mark_hull(data = m,
            aes(x0 = x0, y0 = y0, x = del_qt, y = phi_qt,
                label = id, fill = id),
            alpha = .25, inherit.aes = F, concavity = 2,
            expand = unit(2.5, "mm"),
            con.border = "all",
            # con.colour = "#A95AA1",
            linetype = 0,
            label.fontsize = 14,
            con.arrow = arrow(length = unit(3, "mm")),
            label.margin = margin(1, 1, 1, 1, "mm"),
            con.size = 0.75, con.cap = 0) +
        scale_size(labels = names(qs),
            breaks = qs,
            range = scale_range, trans = trans,
            guide = guide_legend(override.aes = list(
                    shape = 16, # c(23, 8, 22, 24, 23),
                    color = "black", fill = "black"))) + # RESCALE POINTS
        scale_color_manual(values = color_leg,
            breaks = ordered_leg,
            guide = guide_legend(override.aes = list(size = 4))) +
        scale_fill_manual(values = color_leg,
            breaks = ordered_leg) +
        scale_shape_manual(values = shape_leg,
            breaks = ordered_leg) +
        # SHADE IN THE EXITING FIRMS
        geom_polygon(data = coords, aes(del_qt, phi_qt), color = "darkgrey",
                    size = 1, linetype = "dashed", #fill = NULL, 
                    alpha = 0.0, inherit.aes = F) +
        theme_bw() + guides(fill = "none", alpha ='none') + #size = "none", 
        annotate("text", x = exit_x, y = exit_y,
            label = "Exit", size = 5) + # LABEL EXITING FIRMS
        annotate("label", x = 49, y = 25, color = "#9595f6", 
            label = "Periphery\nDomestic", size = 5) + # LABEL Periphery Domestic FIRMS
        annotate("label", x = 75, y = 25, color = 'forestgreen',
            label = "Core Domestic", size = 5) + # LABEL Periphery Domestic FIRMS
        annotate("label", x = 58, y = 100, color = "#b00f14", 
            label = "Core International", size = 5) + # LABEL Periphery Domestic FIRMS
        labs(x = "Size", y = "Efficiency",
            size = q_lab, shape = NULL, color = NULL) +
        theme(legend.position="bottom", legend.box = "vertical",
            legend.box.margin = margin(6, 6, 6, 6),
            text = element_text(size = 18),
            axis.title = element_text(size = 18),
            axis.text = element_text(size = 18),
            legend.title = element_text(size = 16))

    # SAVE FIGURES
    fn_svg <- paste0(var, "_qt_625_1.svg")
    fn_pdf <- paste0(var, "_qt_625_1.pdf")
    ggsave(file.path(out_path, fn_svg), device = "svg",
            dpi = 320, width = 8, height = 8, units = "in")
    ggsave(file.path(out_path, fn_pdf), device=cairo_pdf,
            family="Arial Unicode MS", dpi = 320,
            width = 8, height = 8, units = "in")
}

#  emission
#  intensity
#  dirtyinput
#  abate_ttlexp_share
#  dxratio
VARIABLES <- c("emission", "intensity", "dirtyinput",
                "abate_ttlexp_share", "dxratio", "m")

scales <- list("emission" = c(0.5, 6), #c(1.5, 20),
                "intensity" = c(0.5, 5.5), #c(0.5, 12),
                "dirtyinput" = c(0.5, 6), #c(1.5, 20),
                "abate_ttlexp_share" = c(0.5, 5.5), #12
                "dxratio" = c(0.5, 5.5),#c(0.5, 8))
                "m" = c(0.25, 4.5)) 

trans_list <- list("emission" = "log1p", "intensity" = "identity",
    "dirtyinput" = "log1p", "abate_ttlexp_share" = "identity",
    "dxratio" = "identity", 'm' = 'log1p')

for (variable in VARIABLES) {
    print(variable)
    scale_range <- scales[[variable]]
    trans <- trans_list[[variable]]
    if (variable == "m") {
        q_lab <- "Max Location Prob."
    } else {
        q_lab <- "Quantile"
    }
    firm_plot(variable, scale_range, out_path, trans, q_lab)
}
